home *** CD-ROM | disk | FTP | other *** search
/ Skunkware 5 / Skunkware 5.iso / src / Tools / glimpse-2.1 / agrep / bitap.c < prev    next >
C/C++ Source or Header  |  1995-05-16  |  13KB  |  432 lines

  1. /* Copyright (c) 1994 Sun Wu, Udi Manber, Burra Gopal.  All Rights Reserved. */
  2. /* if the pattern is not simple fixed pattern, then after preprocessing */
  3. /* and generating the masks, the program goes here. four cases:  1.     */ 
  4. /* the pattern is simple regular expression and no error, then do the   */
  5. /* matching here.  2. the pattern is simple regular expression and      */
  6. /* unit cost errors are allowed: then go to asearch().                  */
  7. /* 3. the pattern is simple regular expression, and the edit cost is    */
  8. /* not uniform, then go to asearch1().                                  */
  9. /* if the pattern is regular expression then go to re() if M < 14,      */
  10. /* else go to re1()                                                     */
  11. /* input parameters: old_D_pat: delimiter pattern.                      */
  12. /* fd, input file descriptor, M: size of pattern, D: # of errors.       */
  13.  
  14. #include "agrep.h"
  15. #include "memory.h"
  16.  
  17. extern int CurrentByteOffset;
  18. extern unsigned Init1, D_endpos, endposition, Init[], Mask[], Bit[];
  19. extern int LIMITOUTPUT;
  20. extern int DELIMITER, FILENAMEONLY, D_length, I, AND, REGEX, JUMP, INVERSE; 
  21. extern char D_pattern[];
  22. extern int TRUNCATE, DD, S;
  23. extern char Progname[], CurrentFileName[];
  24. extern int num_of_matched;
  25. extern int agrep_initialfd;
  26. extern int EXITONERROR;
  27. extern int agrep_inlen;
  28. extern CHAR *agrep_inbuffer;
  29. extern int agrep_inpointer;
  30. extern CHAR *agrep_outbuffer;
  31. extern int agrep_outlen;
  32. extern int agrep_outpointer;
  33. extern FILE *agrep_finalfp;
  34. extern int errno;
  35.  
  36. extern int NEW_FILE, POST_FILTER;
  37.  
  38. /* bitap dispatches job */
  39.  
  40. bitap(old_D_pat, Pattern, fd, M, D)
  41. char old_D_pat[], *Pattern;  
  42. int fd, M, D;  
  43. {
  44.     char c;  
  45.     register unsigned r1, r2, r3, CMask, i;
  46.     register unsigned end, endpos, r_Init1;
  47.     register unsigned D_Mask;
  48.     int  ResidueSize , FIRSTROUND, lasti, print_end, j, num_read;
  49.     int  k;
  50.     CHAR *buffer;
  51.  
  52.     D_length = strlen(old_D_pat);
  53.     for(i=0; i<D_length; i++) if(old_D_pat[i] == '^' || old_D_pat[i] == '$')
  54.         old_D_pat[i] = '\n';
  55.     if (REGEX) { 
  56.         if (D > 4) {
  57.             fprintf(stderr, "%s: the maximum number of erorrs allowed for full regular expressions is 4\n", Progname);
  58.             if (!EXITONERROR) {
  59.                 errno = 2;
  60.                 return -1;
  61.             }
  62.             else exit(2);
  63.         }
  64.         if (M <= SHORTREG) { 
  65.             return re(fd, M, D);   /* SUN: need to find a even point */
  66.         }
  67.         else { 
  68.             return re1(fd, M, D);
  69.         }
  70.     }   
  71.     if (D > 0 && JUMP == ON) 
  72.     { 
  73.         return asearch1(old_D_pat, fd, D); 
  74.     }
  75.     if (D > 0) 
  76.     { 
  77.         return asearch(old_D_pat, fd, D); 
  78.     }
  79.     if(I == 0) Init1 = (unsigned)037777777777;
  80.  
  81.     j=0;
  82.  
  83.     r_Init1 = Init1;
  84.     r1 = r2 = r3 = Init[0];
  85.     endpos = D_endpos;
  86.  
  87.     D_Mask = D_endpos;
  88.     for(i=1 ; i<D_length; i++) D_Mask = (D_Mask << 1) | D_Mask;
  89.     D_Mask = ~D_Mask;
  90.     FIRSTROUND = ON;
  91.  
  92. #if    AGREP_POINTER
  93.     if (fd != -1) {
  94. #endif    /*AGREP_POINTER*/
  95.         alloc_buf(fd, &buffer, Max_record+BlockSize+1);
  96.         buffer[Max_record-1] = '\n';
  97.         lasti = Max_record;
  98.         while ((num_read = fill_buf(fd, buffer + Max_record, BlockSize)) > 0)
  99.         {
  100.             i=Max_record; 
  101.             end = Max_record + num_read; 
  102.             if(FIRSTROUND) {  
  103.                 i = Max_record - 1 ;
  104.  
  105.                 if(DELIMITER) {
  106.                     for(k=0; k<D_length; k++) {
  107.                         if(old_D_pat[k] != buffer[Max_record+k]) break;
  108.                     }
  109.                     if(k>=D_length) j--;
  110.                 }
  111.  
  112.                 FIRSTROUND = OFF;  
  113.             }
  114.             if(num_read < BlockSize) {
  115.                 strncpy(buffer+Max_record+num_read, old_D_pat, D_length);
  116.                 end = end + D_length;
  117.                 buffer[end] = '\0';
  118.             }
  119.  
  120.             /* BITAP_PROCESS: the while-loop below */
  121.             while (i < end)
  122.             {
  123.                 c = buffer[i++];
  124.                 CurrentByteOffset ++;
  125.                 CMask = Mask[c];
  126.                 r1 = r_Init1 & r3;
  127.                 r2 = (( r3 >> 1 ) & CMask) | r1;
  128.                 if ( r2 & endpos ) {
  129.                     j++;
  130.                     if (DELIMITER) CurrentByteOffset -= D_length;
  131.                     else CurrentByteOffset -= 1;
  132.                     if(((AND == 1) && ((r2 & endposition) == endposition)) || ((AND == 0) && (r2 & endposition)) ^ INVERSE )
  133.                     { 
  134.                         if(FILENAMEONLY && (NEW_FILE || !POST_FILTER)) {
  135.                             num_of_matched++;
  136.  
  137.                             if (agrep_finalfp != NULL) 
  138.                                 fprintf(agrep_finalfp, "%s\n", CurrentFileName);
  139.                             else {
  140.                                 int outindex;
  141.                                 for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) && 
  142.                                         (CurrentFileName[outindex] != '\0'); outindex++) {
  143.                                     agrep_outbuffer[agrep_outpointer+outindex] = CurrentFileName[outindex];
  144.                                 }
  145.                                 if ((CurrentFileName[outindex] != '\0') || (outindex+agrep_outpointer+1>=agrep_outlen)) {
  146.                                     OUTPUT_OVERFLOW;
  147.                                     free_buf(fd, buffer);
  148.                                     return -1;
  149.                                 }
  150.                                 else agrep_outbuffer[agrep_outpointer+outindex++] = '\n';
  151.                                 agrep_outpointer += outindex;
  152.                             }
  153.  
  154.                             free_buf(fd, buffer);
  155.                             NEW_FILE = OFF;
  156.                             return 0; 
  157.                         }
  158.  
  159.                         print_end = i - D_length - 1;
  160.                         if ( ((fd != -1) && !(lasti >= Max_record+num_read - 1)) || ((fd == -1) && !(lasti >= num_read)) )
  161.                             if (-1 == output(buffer, lasti, print_end, j)) { free_buf(fd, buffer); return -1;} 
  162.                         if ((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) {
  163.                             free_buf(fd, buffer);
  164.                             return 0;    /* done */
  165.                         }
  166.                     }
  167.                     lasti = i - D_length; 
  168.                     TRUNCATE = OFF;
  169.                     r2 = r3 = r1 = Init[0];
  170.                     r1 = r_Init1 & r3;
  171.                     r2 = ((( r2 >> 1) & CMask) | r1 ) & D_Mask;
  172.                     if (DELIMITER) CurrentByteOffset += 1*D_length;
  173.                     else CurrentByteOffset += 1*1;
  174.                 }
  175.                 c = buffer[i++];
  176.                 CurrentByteOffset ++;
  177.                 CMask = Mask[c];
  178.                 r1 = r_Init1 & r2;
  179.                 r3 = (( r2 >> 1 ) & CMask) | r1; 
  180.                 if ( r3 & endpos ) {
  181.                     j++;
  182.                     if (DELIMITER) CurrentByteOffset -= D_length;
  183.                     else CurrentByteOffset -= 1;
  184.                     if(((AND == 1) && ((r3 & endposition) == endposition)) || ((AND == 0) && (r3 & endposition)) ^ INVERSE )
  185.                     { 
  186.                         if(FILENAMEONLY && (NEW_FILE || !POST_FILTER)) {
  187.                             num_of_matched++;
  188.  
  189.                             if (agrep_finalfp != NULL) 
  190.                                 fprintf(agrep_finalfp, "%s\n", CurrentFileName);
  191.                             else {
  192.                                 int outindex;
  193.                                 for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) && 
  194.                                         (CurrentFileName[outindex] != '\0'); outindex++) {
  195.                                     agrep_outbuffer[agrep_outpointer+outindex] = CurrentFileName[outindex];
  196.                                 }
  197.                                 if ((CurrentFileName[outindex] != '\0') || (outindex+agrep_outpointer+1>=agrep_outlen)) {
  198.                                     OUTPUT_OVERFLOW;
  199.                                     free_buf(fd, buffer);
  200.                                     return -1;
  201.                                 }
  202.                                 else agrep_outbuffer[agrep_outpointer+outindex++] = '\n';
  203.                                 agrep_outpointer += outindex;
  204.                             }
  205.  
  206.                             free_buf(fd, buffer);
  207.                             NEW_FILE = OFF;
  208.                             return 0; 
  209.                         }
  210.  
  211.                         print_end = i - D_length - 1;
  212.                         if ( ((fd != -1) && !(lasti >= Max_record+num_read - 1)) || ((fd == -1) && !(lasti >= num_read)) )
  213.                             if (-1 == output(buffer, lasti, print_end, j)) { free_buf(fd, buffer); return -1;}
  214.                         if ((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) {
  215.                             free_buf(fd, buffer);
  216.                             return 0;    /* done */
  217.                         }
  218.                     }
  219.                     lasti = i - D_length ;
  220.                     TRUNCATE = OFF;
  221.                     r2 = r3 = r1 = Init[0]; 
  222.                     r1 = r_Init1 & r2;
  223.                     r3 = ((( r2 >> 1) & CMask) | r1 ) & D_Mask;
  224.                     if (DELIMITER) CurrentByteOffset += 1*D_length;
  225.                     else CurrentByteOffset += 1*1;
  226.                 }   
  227.             }
  228.  
  229.             ResidueSize = num_read + Max_record - lasti;
  230.             if(ResidueSize > Max_record) {
  231.                 ResidueSize = Max_record;
  232.                 TRUNCATE = ON;   
  233.             }
  234.             strncpy(buffer+Max_record-ResidueSize, buffer+lasti, ResidueSize);
  235.             lasti = Max_record - ResidueSize;
  236.             if(lasti < 0) {
  237.                 lasti = 1;
  238.             } 
  239.             if ((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) {
  240.                 free_buf(fd, buffer);
  241.                 return 0;    /* done */
  242.             }
  243.         }
  244.         free_buf(fd, buffer);
  245.         return 0;
  246. #if    AGREP_POINTER
  247.     }
  248.     else {
  249.         buffer = agrep_inbuffer;
  250.         num_read = agrep_inlen;
  251.         end = num_read;
  252.         /* buffer[end-1] = '\n';*/ /* at end of the text. */
  253.         /* buffer[0] = '\n';*/  /* in front of the  text. */
  254.         i = 0;
  255.         lasti = 1;
  256.  
  257.         if(DELIMITER) {
  258.             for(k=0; k<D_length; k++) {
  259.                 if(old_D_pat[k] != buffer[k]) break;
  260.             }
  261.             if(k>=D_length) j--;
  262.         }
  263.  
  264.             /* An exact copy of the above: BITAP_PROCESS: the while-loop below */
  265.             while (i < end)
  266.             {
  267.                 c = buffer[i++];
  268.                 CurrentByteOffset ++;
  269.                 CMask = Mask[c];
  270.                 r1 = r_Init1 & r3;
  271.                 r2 = (( r3 >> 1 ) & CMask) | r1;
  272.                 if ( r2 & endpos ) {
  273.                     j++;
  274.                     if (DELIMITER) CurrentByteOffset -= D_length;
  275.                     else CurrentByteOffset -= 1;
  276.                     if(((AND == 1) && ((r2 & endposition) == endposition)) || ((AND == 0) && (r2 & endposition)) ^ INVERSE )
  277.                     { 
  278.                         if(FILENAMEONLY && (NEW_FILE || !POST_FILTER)) {
  279.                             num_of_matched++;
  280.  
  281.                             if (agrep_finalfp != NULL) 
  282.                                 fprintf(agrep_finalfp, "%s\n", CurrentFileName);
  283.                             else {
  284.                                 int outindex;
  285.                                 for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) && 
  286.                                         (CurrentFileName[outindex] != '\0'); outindex++) {
  287.                                     agrep_outbuffer[agrep_outpointer+outindex] = CurrentFileName[outindex];
  288.                                 }
  289.                                 if ((CurrentFileName[outindex] != '\0') || (outindex+agrep_outpointer+1>=agrep_outlen)) {
  290.                                     OUTPUT_OVERFLOW;
  291.                                     free_buf(fd, buffer);
  292.                                     return -1;
  293.                                 }
  294.                                 else agrep_outbuffer[agrep_outpointer+outindex++] = '\n';
  295.                                 agrep_outpointer += outindex;
  296.                             }
  297.  
  298.                             free_buf(fd, buffer);
  299.                             NEW_FILE = OFF;
  300.                             return 0; 
  301.                         }
  302.  
  303.                         print_end = i - D_length - 1;
  304.                         if ( ((fd != -1) && !(lasti >= Max_record+num_read - 1)) || ((fd == -1) && !(lasti >= num_read)) )
  305.                             if (-1 == output(buffer, lasti, print_end, j)) { free_buf(fd, buffer); return -1;} 
  306.                         if ((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) {
  307.                             free_buf(fd, buffer);
  308.                             return 0;    /* done */
  309.                         }
  310.                     }
  311.                     lasti = i - D_length; 
  312.                     TRUNCATE = OFF;
  313.                     r2 = r3 = r1 = Init[0];
  314.                     r1 = r_Init1 & r3;
  315.                     r2 = ((( r2 >> 1) & CMask) | r1 ) & D_Mask;
  316.                     if (DELIMITER) CurrentByteOffset += 1*D_length;
  317.                     else CurrentByteOffset += 1*1;
  318.                 }
  319.                 c = buffer[i++];
  320.                 CurrentByteOffset ++;
  321.                 CMask = Mask[c];
  322.                 r1 = r_Init1 & r2;
  323.                 r3 = (( r2 >> 1 ) & CMask) | r1; 
  324.                 if ( r3 & endpos ) {
  325.                     j++;
  326.                     if (DELIMITER) CurrentByteOffset -= D_length;
  327.                     else CurrentByteOffset -= 1;
  328.                     if(((AND == 1) && ((r3 & endposition) == endposition)) || ((AND == 0) && (r3 & endposition)) ^ INVERSE )
  329.                     { 
  330.                         if(FILENAMEONLY && (NEW_FILE || !POST_FILTER)) {
  331.                             num_of_matched++;
  332.  
  333.                             if (agrep_finalfp != NULL) 
  334.                                 fprintf(agrep_finalfp, "%s\n", CurrentFileName);
  335.                             else {
  336.                                 int outindex;
  337.                                 for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) && 
  338.                                         (CurrentFileName[outindex] != '\0'); outindex++) {
  339.                                     agrep_outbuffer[agrep_outpointer+outindex] = CurrentFileName[outindex];
  340.                                 }
  341.                                 if ((CurrentFileName[outindex] != '\0') || (outindex+agrep_outpointer+1>=agrep_outlen)) {
  342.                                     OUTPUT_OVERFLOW;
  343.                                     free_buf(fd, buffer);
  344.                                     return -1;
  345.                                 }
  346.                                 else agrep_outbuffer[agrep_outpointer+outindex++] = '\n';
  347.                                 agrep_outpointer += outindex;
  348.                             }
  349.  
  350.                             free_buf(fd, buffer);
  351.                             NEW_FILE = OFF;
  352.                             return 0; 
  353.                         }
  354.  
  355.                         print_end = i - D_length - 1;
  356.                         if ( ((fd != -1) && !(lasti >= Max_record+num_read - 1)) || ((fd == -1) && !(lasti >= num_read)) )
  357.                             if (-1 == output(buffer, lasti, print_end, j)) { free_buf(fd, buffer); return -1;}
  358.                         if ((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) {
  359.                             free_buf(fd, buffer);
  360.                             return 0;    /* done */
  361.                         }
  362.                     }
  363.                     lasti = i - D_length ;
  364.                     TRUNCATE = OFF;
  365.                     r2 = r3 = r1 = Init[0]; 
  366.                     r1 = r_Init1 & r2;
  367.                     r3 = ((( r2 >> 1) & CMask) | r1 ) & D_Mask;
  368.                     if (DELIMITER) CurrentByteOffset += 1*D_length;
  369.                     else CurrentByteOffset += 1*1;
  370.                 }   
  371.             }
  372.  
  373.         return 0;
  374.     }
  375. #endif    /*AGREP_POINTER*/
  376. }
  377.  
  378. fill_buf(fd, buf, record_size)
  379. int fd, record_size; 
  380. unsigned char *buf;
  381. {
  382.     int num_read=1;
  383.     int total_read=0;
  384.     extern int glimpse_clientdied;
  385.  
  386.     if (fd >= 0) {
  387.         while(total_read < record_size && num_read > 0) {
  388.             if (glimpse_clientdied) return 0;
  389.             num_read = read(fd, buf+total_read, record_size - total_read);
  390.             total_read = total_read + num_read;
  391.         }
  392.     }
  393. #if    AGREP_POINTER
  394.     else return 0;    /* should not call this function if buffer is a pointer to a user-specified region! */
  395. #else    /*AGREP_POINTER*/
  396.     else {    /* simulate a file */
  397.         total_read = (record_size > (agrep_inlen - agrep_inpointer)) ? (agrep_len - agrep_inpointer) : record_size;
  398.         memcpy(buf, agrep_inbuffer + agrep_inpointer, total_read);
  399.         agrep_inpointer += total_read;
  400.         /* printf("agrep_inpointer %d total_read %d\n", agrep_inpointer, total_read);*/
  401.     }
  402. #endif    /*AGREP_POINTER*/
  403.     if (glimpse_clientdied) return 0;
  404.     return(total_read);
  405. }
  406.  
  407. /*
  408.  * In these functions no allocs/copying is done when
  409.  * fd == -1, i.e., agrep is called to search within memory.
  410.  */
  411.  
  412. alloc_buf(fd, buf, size)
  413.     int fd;
  414.     char **buf;
  415.     int size;
  416. {
  417. #if    AGREP_POINTER
  418.     if (fd != -1)
  419. #endif    /*AGREP_POINTER*/
  420.         *buf = (char *)malloc(size);
  421. }
  422.  
  423. free_buf(fd, buf)
  424.     int fd;
  425.     char *buf;
  426. {
  427. #if    AGREP_POINTER
  428.     if (fd != -1)
  429. #endif    /*AGREP_POINTER*/
  430.         free(buf);
  431. }
  432.